Setup

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, cache = TRUE)
knitr::opts_knit$set(root.dir = '/Volumes/JARC_2T/NSF-CREST_Postdoc/LongTerm_Hypoxia_exp/AplCal_hypoxia/TagSeq/')
# Load libraries
library(DESeq2)    # BiocManager::install('DESeq2')
library(limma)     # BiocManager::install('limma')
library(stringr)
library(readxl)
library(pheatmap)
library(RColorBrewer)
library(variancePartition)  # BiocManager::install('variancePartition')
library(doParallel)
library(cowplot)
source("analyses/GO_MWU/gomwu.functions.R")
library(tidyverse)
library(KOGMWU)  # install.packages("KOGMWU")
library(adegenet)
library(ggpubr)
library(kableExtra)
library(tidyverse)
library(KOGMWU)  # install.packages("KOGMWU")
library(adegenet)
library(ggpubr)
library(kableExtra)
library(ggfortify) # install.packages("ggfortify")
library(Biobase)
library(arrayQualityMetrics)
library(vegan)

## ggplot theme
theme_custom <- function() {
  theme_bw(base_size = 10) %+replace%    #, base_family = "Arial"
    theme(
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(), 
      panel.background = element_blank(),
      panel.border = element_rect(color = "black", fill = NA),
      legend.background = element_rect(fill = NA, colour = NA),
      axis.text.x = element_text(angle=45, hjust=1, vjust = 1)
    )
}
## ggplot labeller
colnames <- c(
  `A` = "abdominal",
  `Pp` = "pleural & pedal"
)

parents <- c(
  `60` = "lab-reared",
  `71` = "wild"
)

global_labeller <- labeller(
  tissue = colnames,
  batch = parents,
  .default = "label_parsed"
)

Load data

Gene count data

#name <- gsub( "(?:[^_]+_){4}([^_ ]+)*$","", files)
  
  # STAR quantMode geneCounts output:
  #column 1: gene ID
  #column 2: counts for unstranded RNA-seq
  #column 3: counts for the 1st read strand aligned with RNA (htseq-count option -s yes)
  #column 4: counts for the 2nd read strand aligned with RNA (htseq-count option -s reverse)
  
sampleNames <- list.files(path = glue::glue(getwd(), "/data/GeneCounts"), pattern = "*.ReadsPerGene.out.tab") %>%
    stringr::str_split_fixed("_", n = 4) %>%
    tibble::as_tibble() %>%
    dplyr::mutate(Name = V1) %>%
    dplyr::select(Name) %>% 
    purrr::flatten_chr()
  
geneIDs <- list.files(path = glue::glue(getwd(), "/data/GeneCounts"), pattern = "*.ReadsPerGene.out.tab", full.names = T)[1] %>% 
    data.table::fread(select = 1) %>%
    purrr::flatten_chr()
  
countMatrix <- list.files(path = glue::glue(getwd(), "/data/GeneCounts"), pattern = "*.ReadsPerGene.out.tab", full.names = T) %>%
    purrr::map_dfc(data.table::fread, select = 3, data.table = F) %>%
    magrittr::set_colnames(sampleNames) %>% 
    magrittr::set_rownames(geneIDs)
  
countMatrix <- countMatrix[-c(1:4),]

Sample metadata

# Import sample metadata
sdat0 <- read_csv("data/sample_metadata.csv") 
sdat0$treatment <- gsub("C", "Control", sdat0$treatment)
sdat0 <- sdat0 %>%  
  mutate(tissue.group = interaction(tissue, treatment)) %>%
  mutate(batch.group = interaction(batch, treatment)) %>%
  mutate_if(is.character, as.factor)

sdat <- sdat0 %>%
  arrange(sample_ID) %>%                              # order rows by sample name
  column_to_rownames(var = "sample_ID")               # set sample name to rownames

save(sdat,sdat0,countMatrix, file = "output/counts_and_meta.RData")

#sdat$treatment <- factor(sdat$treatment, levels = c("Control","Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery"), ordered = T)
#sdat$group <- paste(sdat$treatment, sdat$batch, sdat$tissue, sep = "")

Gene annotations

annot <- read.delim("../../../Reference_Datasets/annotations_processed/AplCal3_gene2tx2prot2hs_uniprot.txt")
head(annot)
##           gene           tx         prot   evalue UNIPROT Hs_Gene
## 1 LOC101845732 XM_005088767 XP_005088824 8.47e-16  P12074   CX6A1
## 2 LOC101845963 XM_005088769 XP_005088826 0.00e+00  O00400   ACATN
## 3 LOC101845963 XM_005088768 XP_005088825 0.00e+00  O00400   ACATN
## 4 LOC101847001 XM_005088773 XP_005088830 1.12e-13  P50461   CSRP3
## 5 LOC101847001 XM_005088773 XP_005088830 2.07e-13  P50461   CSRP3
## 6 LOC101855301 XM_005088905 XP_005088962 5.21e-42  Q6UVY6   MOXD1
##       tx_version   prot_version
## 1 XM_005088767.1 XP_005088824.1
## 2 XM_005088769.3 XP_005088826.1
## 3 XM_005088768.3 XP_005088825.1
## 4 XM_005088773.3 XP_005088830.1
## 5 XM_005088773.3 XP_005088830.1
## 6 XM_005088905.3 XP_005088962.1
gene2uniprot <- annot[,c(1,5)]
write_delim(gene2uniprot,"analyses/annotation/AplCal3_gene2uniprot.txt", delim = "\t")
write(gene2uniprot$UNIPROT,"analyses/annotation/AplCal3_uniprot.txt")

uni2go <- read.delim("../../../Reference_Datasets/annotations_processed/uniprot-compressed_true_download_true_fields_accession_2Cid_2Cprotei-2022.09.29-19.04.25.82.tsv")
colnames(uni2go)[2] <- "UNIPROT"

gene2go <- merge(gene2uniprot, uni2go, by = "UNIPROT", all.x = T)
head(gene2go)
##      UNIPROT         gene       From  Entry.Name
## 1 A0A075B6T6 LOC101852117 A0A075B6T6 TVAL2_HUMAN
## 2 A0A0B4J245 LOC101852547 A0A0B4J245 TVAL1_HUMAN
## 3 A0A0U1RQI7 LOC101858283 A0A0U1RQI7 KLF18_HUMAN
## 4 A0A0U1RQI7 LOC101858283 A0A0U1RQI7 KLF18_HUMAN
## 5 A0A0U1RQI7 LOC101858283 A0A0U1RQI7 KLF18_HUMAN
## 6 A0A0U1RQI7 LOC101858283 A0A0U1RQI7 KLF18_HUMAN
##                         Protein.names Gene.Names
## 1 T cell receptor alpha variable 12-2   TRAV12-2
## 2 T cell receptor alpha variable 12-1   TRAV12-1
## 3              Kruppel-like factor 18      KLF18
## 4              Kruppel-like factor 18      KLF18
## 5              Kruppel-like factor 18      KLF18
## 6              Kruppel-like factor 18      KLF18
##                                            Gene.Ontology.IDs
## 1                         GO:0002250; GO:0042101; GO:0042605
## 2                         GO:0002250; GO:0042101; GO:0042605
## 3 GO:0000978; GO:0000981; GO:0005634; GO:0006357; GO:0046872
## 4 GO:0000978; GO:0000981; GO:0005634; GO:0006357; GO:0046872
## 5 GO:0000978; GO:0000981; GO:0005634; GO:0006357; GO:0046872
## 6 GO:0000978; GO:0000981; GO:0005634; GO:0006357; GO:0046872
gene2go <- gene2go[,c(2,7)]

write_delim(gene2go,"analyses/GO_MWU/seq2go.tab", delim = "\t")

Create DESeqDataSet

# Create full DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = countMatrix,
                              colData = sdat,
                              design = ~ tissue)
### Remove genes counted zero times
dds <- dds[ rowSums(counts(dds)) > 0, ]

# Subset DESeqDataSet
## Subset for batch and ganglia
dds_60 <- dds[, colData(dds)$batch == 60] 
dds_60A <- dds_60[, colData(dds_60)$tissue == "A"]
dds_60Pp <- dds_60[, colData(dds_60)$tissue == "Pp"]

dds_71 <- dds[, colData(dds)$batch == 71] 
dds_71A <- dds_71[, colData(dds_71)$tissue == "A"]
dds_71Pp <- dds_71[, colData(dds_71)$tissue == "Pp"]

### Remove genes counted zero times in subset
dds_60 <- dds_60[ rowSums(counts(dds_60)) > 0, ]
dds_60A <- dds_60A[ rowSums(counts(dds_60A)) > 0, ]
dds_60Pp <- dds_60Pp[ rowSums(counts(dds_60Pp)) > 0, ]

dds_71 <- dds_71[ rowSums(counts(dds_71)) > 0, ]
dds_71A <- dds_71A[ rowSums(counts(dds_71A)) > 0, ]
dds_71Pp <- dds_71Pp[ rowSums(counts(dds_71Pp)) > 0, ]

dds_A <- dds[, colData(dds)$tissue == "A"] 
dds_Pp <- dds[, colData(dds)$tissue == "Pp"] 

### Drop unused factor levels
colData(dds_60A) <- droplevels(colData(dds_60A))
colData(dds_60Pp) <- droplevels(colData(dds_60Pp))

colData(dds_71A) <- droplevels(colData(dds_71A))
colData(dds_71Pp) <- droplevels(colData(dds_71Pp))

colData(dds_A) <- droplevels(colData(dds_A))
colData(dds_Pp) <- droplevels(colData(dds_Pp))

Summarize count data

# Read in count totals of raw, postQC, and mapped reads
count_summ <- read.table("data/count_totals.txt") %>%
  mutate(sample_ID = str_sub(V1, 1, 7),
         raw = V2, post_qc = V3, mapped = V4) %>%
  select(sample_ID, raw, post_qc, mapped) %>%
  gather(stage, count, -sample_ID) 

# Summarize counts for only samples included in paper (sampled 2017-10-28)
counttable <- sdat0 %>%
  left_join(count_summ) %>%
  group_by(stage) %>%
  summarize(`Min. (per sample)` = min(count),
            `Max. (per sample)` = max(count),
            `Median (per sample)` = median(count),
            Total = sum(count)) %>%
  mutate_if(is_numeric, ~ format(ceiling(.), big.mark = ",")) %>%
  arrange(rev(stage))
 
counttable <- counttable[-1,]
counttable %>%
   knitr::kable(caption = "Total read counts")
Total read counts
stage Min. (per sample) Max. (per sample) Median (per sample) Total
raw 3,744,475 14,723,150 10,642,582 449,914,831
post_qc 2,868,631 12,341,999 8,423,182 351,314,325
mapped 2,026,328 9,255,139 5,972,638 250,056,187
# Number of samples
nsamples <- ncol(counts(dds))

# Number of reads per sample
rps <- qplot(colSums(counts(dds))) +
  labs(x = "Mapped reads per sample", y = "Number of samples",
       title = "Mapped reads per sample") +
  geom_label(aes(x = 6e5, y = 13, label = paste(nsamples, "samples")))

# Number of genes
ngenes <- nrow(counts(dds))
# Number of reads per gene
rpg <- qplot(log10(rowSums(counts(dds))), bins = 16) +
  labs(x = "log10(Mapped reads per gene)", y = "Number of genes",
       title = "Mapped reads per gene") +
  geom_label(aes(x = 4, y = 1500, label = paste(ngenes, "genes")))

countfig <- plot_grid(rps, rpg)

countfig

# Save table and fig to include in supplemental information
write_csv(counttable, path = "output/readcounttable.csv")
ggsave(countfig, filename = "figures/FigS1_TagSeq_seq_Stats.png", device = "png", width = 150, height = 85, units = "mm")

Filter and transform count data

# Remove genes with less than 1 mean count across samples
dds <- dds[ rowMeans(counts(dds)) > 1, ]
dds_60 <- dds_60[ rowMeans(counts(dds_60)) > 1, ]
dds_60A <- dds_60A[ rowMeans(counts(dds_60A)) > 1, ]
dds_60Pp <- dds_60Pp[ rowMeans(counts(dds_60Pp)) > 1, ]

dds_71 <- dds_71[ rowMeans(counts(dds_71)) > 1, ]
dds_71A <- dds_71A[ rowMeans(counts(dds_71A)) > 1, ]
dds_71Pp <- dds_71Pp[ rowMeans(counts(dds_71Pp)) > 1, ]

dds_A <- dds_A[ rowMeans(counts(dds_A)) > 1, ]
dds_Pp <- dds_Pp[ rowMeans(counts(dds_Pp)) > 1, ]

# Normalize expression data for visualization purposes using VST tranformation
vsd <- vst(dds, blind = TRUE)
vsd_60 <- vst(dds_60, blind = TRUE)
vsd_60A <- vst(dds_60A, blind = TRUE)
vsd_60Pp <- vst(dds_60Pp, blind = TRUE)

vsd_71 <- vst(dds_71, blind = TRUE)
vsd_71A <- vst(dds_71A, blind = TRUE)
vsd_71Pp <- vst(dds_71Pp, blind = TRUE)

vsd_A <- vst(dds_A, blind = TRUE)
vsd_Pp <- vst(dds_Pp, blind = TRUE)

Visualize transcriptome

Principal Coordinates Analysis

## Calculate distances among samples
sampleDists <- dist(t(assay(vsd)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd)) %>% 
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(fillcol = factor(ifelse(tissue == "A", NA, tissue))) %>% 
  mutate(alphaval = ifelse(batch == 60, 1, 0))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(tissue, batch, treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = tissue)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2),
               lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2, fill = treatment, alpha = as.character(alphaval))) +
  geom_point(size = 2, aes(x = c1, y = c2), fill = ifelse(mds$alphaval, "white", NA)) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`, fill = treatment, alpha = as.character(alphaval)), show.legend = F) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`), fill = ifelse(mds$alphaval, "white", NA), show.legend = F) +
  scale_shape_manual(values = c(21, 24), name = "ganglia", labels = c("abdominal", "pleural & pedal")) +
  scale_alpha_manual(values = c(1, 0), name = "batch", labels = c("lab_reared parents", "wild parents")) +
  guides(shape = guide_legend(order = 2, override.aes = list(fill = "black"))) +
  guides(alpha = guide_legend(override.aes = list(shape = 21, alpha = 1, fill = c("black", "white")))) +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing.y = unit(0, "cm"))

pcoa

ggsave(filename = "figures/Fig1.pcoa_GE.png",pcoa, width = 160, height = 110, units = "mm")

The PCoA shows that global gene expression varies significantly by batch along PC1 and by ganglia along PC2.In order to see clear treatment effects I will perform PCoAs for each batch and ganglia

## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_60)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_60)) %>% 
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(fillcol = factor(ifelse(tissue == "A", NA, tissue)))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(tissue, treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = tissue)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2), show.legend = FALSE) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`)) +
  scale_shape_discrete(name = "ganglia", labels = c("abdominal", "pleural & pedal")) +
  scale_color_discrete(name = "treatment") +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing = unit(0, "cm"), legend.key.size = unit(0.7, 'lines')) +
  guides(fill=guide_legend(ncol=2)) 

pcoa

ggsave(filename = "figures/Fig.pcoa_batch60_GE.png",pcoa, width = 160, height = 110, units = "mm")
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_71)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_71)) %>% 
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(fillcol = factor(ifelse(tissue == "A", NA, tissue)))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(tissue, treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = tissue)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2), show.legend = FALSE) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`)) +
  scale_shape_discrete(name = "ganglia", labels = c("abdominal", "pleural & pedal")) +
  scale_color_discrete(name = "treatment") +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing = unit(0, "cm"), legend.key.size = unit(0.7, 'lines')) +
  guides(fill=guide_legend(ncol=2)) 

pcoa

ggsave(filename = "figures/Fig.pcoa_batch71_GE.png",pcoa, width = 171, height = 110, units = "mm")
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_A)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_A)) %>% 
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(btch = factor(batch))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(batch, treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = btch)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2), show.legend = FALSE) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`)) +
  scale_shape_discrete(name = "batch", labels = c("lab_reared parents", "wild parents")) +
  scale_color_discrete(name = "treatment") +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing = unit(0, "cm"), legend.key.size = unit(0.7, 'lines')) +
  guides(fill=guide_legend(ncol=2)) 

pcoa

ggsave(filename = "figures/Fig.pcoa_Abdominal_GE.png",pcoa, width = 171, height = 110, units = "mm")
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_Pp)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_Pp)) %>% 
  cbind(cmdscale(sampleDistMatrix)) %>%
  mutate(btch = factor(batch))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(batch, treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment, shape = btch)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2), show.legend = FALSE) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`)) +
  scale_shape_discrete(name = "batch", labels = c("lab_reared parents", "wild parents")) +
  scale_color_discrete(name = "treatment") +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing = unit(0, "cm"), legend.key.size = unit(0.7, 'lines')) +
  guides(fill=guide_legend(ncol=2)) 

pcoa

ggsave(filename = "figures/Fig.pcoa_PlePedal_GE.png",pcoa, width = 171, height = 110, units = "mm")
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_60A)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_60A)) %>% 
  cbind(cmdscale(sampleDistMatrix))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2), show.legend = FALSE) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`)) +
  scale_color_discrete(name = "treatment") +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing = unit(0, "cm"), legend.key.size = unit(0.7, 'lines')) +
  guides(fill=guide_legend(ncol=2)) 

pcoa

ggsave(filename = "figures/Fig.pcoa_60A_GE.png",pcoa, width = 171, height = 110, units = "mm")
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_60Pp)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_60Pp)) %>% 
  cbind(cmdscale(sampleDistMatrix))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2), show.legend = FALSE) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`)) +
  scale_color_discrete(name = "treatment") +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing = unit(0, "cm"), legend.key.size = unit(0.7, 'lines')) +
  guides(fill=guide_legend(ncol=2)) 

pcoa

ggsave(filename = "figures/Fig.pcoa_60Pp_GE.png",pcoa, width = 171, height = 110, units = "mm")
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_71A)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_71A)) %>% 
  cbind(cmdscale(sampleDistMatrix))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2), show.legend = FALSE) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`)) +
  scale_color_discrete(name = "treatment") +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing = unit(0, "cm"), legend.key.size = unit(0.7, 'lines')) +
  guides(fill=guide_legend(ncol=2)) 

pcoa

ggsave(filename = "figures/Fig.pcoa_71A_GE.png",pcoa, width = 171, height = 110, units = "mm")
## Calculate distances among samples
sampleDists <- dist(t(assay(vsd_71Pp)), method = "manhattan")
sampleDistMatrix <- as.matrix(sampleDists)

## Calculate MDS
mds <- as.data.frame(colData(vsd_71Pp)) %>% 
  cbind(cmdscale(sampleDistMatrix))

# Calculate group centroids for plotting
mds <- mds %>%
  group_by(treatment) %>%
  dplyr::summarise(c1 = mean(`1`), c2 = mean(`2`)) %>%    
  full_join(mds)

# Calculate variance explained by each PC
MDS <- cmdscale(sampleDistMatrix, eig = TRUE)
vexpl <- round(MDS$eig*100/sum(MDS$eig),1)[1:2]

# Plot with spiders
pcoa <- ggplot(mds, aes(color = treatment)) +
  geom_segment(mapping = aes(x = `1`, y = `2`, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(size = 2, aes(x = c1, y = c2), show.legend = FALSE) +
  geom_point(size = 0.7, aes(x = `1`, y = `2`)) +
  scale_color_discrete(name = "treatment") +
  labs(x = paste0("PC1 [", vexpl[1],"%]"), y = paste0("PC2 [", vexpl[2],"%]")) +
  theme_custom() +
  theme(legend.spacing = unit(0, "cm"), legend.key.size = unit(0.7, 'lines')) +
  guides(fill=guide_legend(ncol=2)) 

pcoa

ggsave(filename = "figures/Fig.pcoa_71Pp_GE.png",pcoa, width = 171, height = 110, units = "mm")

Discriminant Analysis

Discriminant Analysis of Principal Components (DAPC)

set.seed(1234)
# Discriminant function including all genes

dat <- data.frame(assay(vsd_71)) 

# How many PCs should be kept?
dapc2 <- dapc(t(dat), colData(dds_71)$treatment, n.da=13, n.pca=100)
temp <- optim.a.score(dapc2, n.sim = 50)

my.dapc <- function(n.pca) dapc(t(dat), colData(dds_71)$treatment, n.pca = n.pca, n.da = 13)

library(furrr)
plan(multiprocess)
my.dapc.res <- tibble(n.pca = 2:30) %>%
  mutate(dapc = map(n.pca, my.dapc),
         a.score = furrr::future_map(dapc, a.score, n.sim = 1000, seed = TRUE),
          mean = map_dbl(a.score, ~ .$mean),
          cumvar = map_dbl(dapc, ~ .$var))
 
my.dapc.res %>%
   arrange(-mean) %>%
   head()
## # A tibble: 6 × 5
##   n.pca dapc   a.score           mean cumvar
##   <int> <list> <list>           <dbl>  <dbl>
## 1     7 <dapc> <named list [3]> 0.425  0.633
## 2    11 <dapc> <named list [3]> 0.419  0.710
## 3     9 <dapc> <named list [3]> 0.414  0.675
## 4     8 <dapc> <named list [3]> 0.411  0.654
## 5    12 <dapc> <named list [3]> 0.408  0.724
## 6    10 <dapc> <named list [3]> 0.375  0.693
# Retaining 8 PC's gives a high a-score (52%) and utilizes 63% of variance. 

dp1 <- dapc(t(dat), colData(dds_71)$treatment,
            n.pca = 7, n.da = 6)   # Retain 7 PCs and 6 discriminant functions
varexpl <- round((dp1$eig/sum(dp1$eig))[1:2] * 100, 1)

dapc <- tibble(sample = rownames(dp1$ind.coord),
               grp = dp1$grp,
               LD1 = dp1$ind.coord[,1],
               LD2 = dp1$ind.coord[,2])
dapc <- dapc %>%
  group_by(grp) %>%
  summarize(c1 = mean(LD1),
            c2 = mean(LD2)) %>%
  full_join(dapc)

# Plot with spiders
dapc.fig <- 
  ggplot(dapc, aes(shape = factor(grepl("Pp", sample)), 
                             fill = factor(grp, levels = c("Control","Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery"), ordered = T))) +
  geom_segment(mapping = aes(x = LD1, y = LD2, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(aes(x = c1, y = c2), size = 2) +
  geom_point(aes(x = LD1, y = LD2), size = 1, show.legend = FALSE) +
  scale_shape_manual(name = "ganglia", labels = c("Abdominal", "Pleural & Pedal"), values = c(21, 24)) +
  scale_fill_discrete(name = "treatment") +
  guides(fill = guide_legend(override.aes = list(shape = 21, size = 2))) +
  guides(shape = guide_legend(override.aes = list(fill = "black", size = 2))) +
  labs(x = paste0("LD1 [", varexpl[1],"%]"), y = paste0("LD2 [", varexpl[2],"%]")) +
  theme_custom() 

dapc.fig

ggsave(filename = "figures/Fig.DAPC_71_GE.png",pcoa, width = 171, height = 110, units = "mm")
set.seed(1234)
# Discriminant function including all genes

dat <- data.frame(assay(vsd_60)) 

# How many PCs should be kept?
dapc2 <- dapc(t(dat), colData(dds_60)$treatment, n.da=13, n.pca=100)
temp <- optim.a.score(dapc2, n.sim = 50)

my.dapc <- function(n.pca) dapc(t(dat), colData(dds_60)$treatment, n.pca = n.pca, n.da = 13)

library(furrr)
plan(multiprocess)
my.dapc.res <- tibble(n.pca = 2:30) %>%
  mutate(dapc = map(n.pca, my.dapc),
         a.score = furrr::future_map(dapc, a.score, n.sim = 1000, seed = TRUE),
          mean = map_dbl(a.score, ~ .$mean),
          cumvar = map_dbl(dapc, ~ .$var))
 
my.dapc.res %>%
   arrange(-mean) %>%
   head()
## # A tibble: 6 × 5
##   n.pca dapc   a.score           mean cumvar
##   <int> <list> <list>           <dbl>  <dbl>
## 1     7 <dapc> <named list [3]> 0.522  0.605
## 2     5 <dapc> <named list [3]> 0.518  0.556
## 3     8 <dapc> <named list [3]> 0.514  0.626
## 4     9 <dapc> <named list [3]> 0.501  0.645
## 5     6 <dapc> <named list [3]> 0.477  0.583
## 6    10 <dapc> <named list [3]> 0.475  0.663
# Retaining 8 PC's gives a high a-score (52%) and utilizes 63% of variance. 

dp1 <- dapc(t(dat), colData(dds_60)$treatment,
            n.pca = 7, n.da = 6)   # Retain 7 PCs and 6 discriminant functions
varexpl <- round((dp1$eig/sum(dp1$eig))[1:2] * 100, 1)

dapc <- tibble(sample = rownames(dp1$ind.coord),
               grp = dp1$grp,
               LD1 = dp1$ind.coord[,1],
               LD2 = dp1$ind.coord[,2])
dapc <- dapc %>%
  group_by(grp) %>%
  summarize(c1 = mean(LD1),
            c2 = mean(LD2)) %>%
  full_join(dapc)

# Plot with spiders
dapc.fig <- 
  ggplot(dapc, aes(shape = factor(grepl("Pp", sample)), 
                             fill = factor(grp, levels = c("Control","Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery"), ordered = T))) +
  geom_segment(mapping = aes(x = LD1, y = LD2, xend = c1, yend = c2), lwd = 0.25, col = "grey") +
  geom_point(aes(x = c1, y = c2), size = 2) +
  geom_point(aes(x = LD1, y = LD2), size = 1, show.legend = FALSE) +
  scale_shape_manual(name = "ganglia", labels = c("Abdominal", "Pleural & Pedal"), values = c(21, 24)) +
  scale_fill_discrete(name = "treatment") +
  guides(fill = guide_legend(override.aes = list(shape = 21, size = 2))) +
  guides(shape = guide_legend(override.aes = list(fill = "black", size = 2))) +
  labs(x = paste0("LD1 [", varexpl[1],"%]"), y = paste0("LD2 [", varexpl[2],"%]")) +
  theme_custom() 

dapc.fig

ggsave(filename = "figures/Fig.DAPC_60_GE.png",pcoa, width = 171, height = 110, units = "mm")

The DAPC shows that despite the significant variability among ganglia, the treatment groups can be discriminated well based on gene expression data and show a consistent response. This is true for both families (60 and 71).

Differential expression analysis

Since families and tissues appear to respond (at least on PCoA) differently to hypoxia, we should analyze differential expression in each family and tissue separately to assess their unique responses. We can also analyze differential expression across all tissue and across families together to see what responses are common.

Run DESeq

Starting with Batch 60 (in-house parents) First, within each batch and across tissue

# Run DESeq pipeline
design(dds_60) <- formula(~ tissue.group)
dsr1 <- DESeq(dds_60)

# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery",
                                  "Hyp_T6h","Reox","Recovery","Reox","LtH_6","LtH_7"),
                          den = c("Control","Control","Control","Control","Control","Control",
                                  "Hyp_T2h","Hyp_T6h","Hyp_T6h","Recovery","Hyp_T6h","Hyp_T6h"))

# Get DESeq results for all group contrasts for each colony
DE <- crossing(tissue = c("A", "Pp"), group.contrasts) %>%
  mutate(dsr = pmap(list(tissue, num, den), function(x, y, z) {
    results(dsr1, contrast = c("tissue.group", paste0(x, ".", c(y, z))))}))

Then, for both tissues together

# Run DESeq pipeline 
design(dds_60) <- formula(~ tissue + treatment)
dsr2 <- DESeq(dds_60)

# Get DESeq results for all contrasts and join with results from each colony, from above
DE <- crossing(tissue = "all", group.contrasts) %>%
  mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
  bind_rows(DE)

Get significant DEGs

DE <- DE %>%
  mutate(sig = map(dsr, ~ rownames_to_column(data.frame(.[which(.$padj < 0.1), ]), "gene")),
          up = map(sig, ~ filter(., log2FoldChange > 0)),
        down = map(sig, ~ filter(., log2FoldChange < 0))) 

# Generate logP values for all differential expression contrasts
DE <- DE %>% mutate(logP = map(dsr, ~ data.frame(
  gene = rownames(data.frame(.)),
  logP = -log10(data.frame(.)$pvalue) * sign(data.frame(.)$log2FoldChange))))

# Count number of differentially expressed genes within each colony, and overall, for each contrast
DEtab <- DE %>%
  filter((num == "Hyp_T2h" & den == "Control")|(num == "Hyp_T6h" & den == "Control")|
         (num == "LtH_6" & den == "Control")|(num == "LtH_7" & den == "Control")|
         (num == "Reox" & den == "Control")|(num == "Recovery" & den == "Control")|
         (num == "Hyp_T6h" & den == "Hyp_T2h")|(num == "Reox" & den == "Hyp_T6h")|
         (num == "Recovery" & den == "Hyp_T6h")|(num == "Reox" & den == "Recovery")|
         (num == "LtH_6" & den == "Hyp_T6h")|(num == "LtH_7" & den == "Hyp_T6h")) %>%
  mutate(nsig = map_dbl( sig, ~ nrow(.)),
          nup = map_dbl(  up, ~ nrow(.)),
          ndn = map_dbl(down, ~ nrow(.)),
          `DEGs [up, down]` = paste0(nsig, " [", nup, ", ", ndn, "]")) %>%
  mutate(ganglia = case_when(tissue == "A" ~ "abdominal", tissue == "Pp" ~ "pleural & pedal", 
                           tissue == "all" ~ "all ganglia")) %>%
  mutate(ganglia = factor(ganglia, levels = c("abdominal", "pleural & pedal", "all ganglia"))) %>%
  select(ganglia, num, den, `DEGs [up, down]`) %>%
  spread(ganglia, `DEGs [up, down]`)

DEtab$contrast <- paste(DEtab$num, DEtab$den, sep = " vs ")
dek <- DEtab %>% select(c(6,3:5)) %>%
  knitr::kable(format = "markdown", caption = "Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.", row.names = )

dek
Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.
contrast abdominal pleural & pedal all ganglia
Hyp_T2h vs Control 6232 [2124, 4108] 1295 [479, 816] 5748 [1853, 3895]
Hyp_T6h vs Control 6041 [2085, 3956] 868 [320, 548] 4891 [1661, 3230]
Hyp_T6h vs Hyp_T2h 12 [10, 2] 50 [35, 15] 175 [103, 72]
LtH_6 vs Control 4484 [1399, 3085] 1415 [534, 881] 3527 [1087, 2440]
LtH_6 vs Hyp_T6h 217 [81, 136] 1817 [939, 878] 1940 [897, 1043]
LtH_7 vs Control 3514 [1147, 2367] 178 [64, 114] 1750 [445, 1305]
LtH_7 vs Hyp_T6h 183 [68, 115] 1028 [530, 498] 1231 [603, 628]
Recovery vs Control 4630 [1333, 3297] 337 [96, 241] 3398 [901, 2497]
Recovery vs Hyp_T6h 367 [86, 281] 360 [155, 205] 1124 [437, 687]
Reox vs Control 4727 [1495, 3232] 597 [257, 340] 4227 [1309, 2918]
Reox vs Hyp_T6h 0 [0, 0] 3 [0, 3] 26 [12, 14]
Reox vs Recovery 126 [80, 46] 200 [121, 79] 585 [350, 235]

Then Batch 71 (wild parents)

# Run DESeq pipeline
design(dds_71) <- formula(~ tissue.group)
dsr1 <- DESeq(dds_71)

# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery",
                                  "Hyp_T6h","Reox","Recovery","Reox","LtH_6","LtH_7"),
                          den = c("Control","Control","Control","Control","Control","Control",
                                  "Hyp_T2h","Hyp_T6h","Hyp_T6h","Recovery","Hyp_T6h","Hyp_T6h"))

# Get DESeq results for all group contrasts for each colony
DE_71 <- crossing(tissue = c("A", "Pp"), group.contrasts) %>%
  mutate(dsr = pmap(list(tissue, num, den), function(x, y, z) {
    results(dsr1, contrast = c("tissue.group", paste0(x, ".", c(y, z))))}))

Then, for both tissues together

# Run DESeq pipeline 
design(dds_71) <- formula(~ tissue + treatment)
dsr2 <- DESeq(dds_71)

# Get DESeq results for all contrasts and join with results from each colony, from above
DE_71 <- crossing(tissue = "all", group.contrasts) %>%
  mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
  bind_rows(DE_71)

Get significant DEGs

DE_71 <- DE_71 %>%
  mutate(sig = map(dsr, ~ rownames_to_column(data.frame(.[which(.$padj < 0.1), ]), "gene")),
          up = map(sig, ~ filter(., log2FoldChange > 0)),
        down = map(sig, ~ filter(., log2FoldChange < 0))) 

# Generate logP values for all differential expression contrasts
DE_71 <- DE_71 %>% mutate(logP = map(dsr, ~ data.frame(
  gene = rownames(data.frame(.)),
  logP = -log10(data.frame(.)$pvalue) * sign(data.frame(.)$log2FoldChange))))

# Count number of differentially expressed genes within each colony, and overall, for each contrast
DEtab_71 <- DE_71 %>%
  filter((num == "Hyp_T2h" & den == "Control")|(num == "Hyp_T6h" & den == "Control")|
         (num == "LtH_6" & den == "Control")|(num == "LtH_7" & den == "Control")|
         (num == "Reox" & den == "Control")|(num == "Recovery" & den == "Control")|
         (num == "Hyp_T6h" & den == "Hyp_T2h")|(num == "Reox" & den == "Hyp_T6h")|
         (num == "Recovery" & den == "Hyp_T6h")|(num == "Reox" & den == "Recovery")|
         (num == "LtH_6" & den == "Hyp_T6h")|(num == "LtH_7" & den == "Hyp_T6h")) %>%
  mutate(nsig = map_dbl( sig, ~ nrow(.)),
          nup = map_dbl(  up, ~ nrow(.)),
          ndn = map_dbl(down, ~ nrow(.)),
          `DEGs [up, down]` = paste0(nsig, " [", nup, ", ", ndn, "]")) %>%
  mutate(ganglia = case_when(tissue == "A" ~ "abdominal", tissue == "Pp" ~ "pleural & pedal", 
                           tissue == "all" ~ "all ganglia")) %>%
  mutate(ganglia = factor(ganglia, levels = c("abdominal", "pleural & pedal", "all ganglia"))) %>%
  select(ganglia, num, den, `DEGs [up, down]`) %>%
  spread(ganglia, `DEGs [up, down]`)

DEtab_71$contrast <- paste(DEtab_71$num, DEtab$den, sep = " vs ")
dek <- DEtab_71 %>% select(c(6,3:5)) %>%
  knitr::kable(format = "markdown", caption = "Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.", row.names = )

dek
Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.
contrast abdominal pleural & pedal all ganglia
Hyp_T2h vs Control 4103 [1063, 3040] 197 [75, 122] 3295 [978, 2317]
Hyp_T6h vs Control 4342 [1254, 3088] 143 [86, 57] 3238 [1014, 2224]
Hyp_T6h vs Hyp_T2h 194 [175, 19] 3 [2, 1] 204 [167, 37]
LtH_6 vs Control 4521 [1222, 3299] 0 [0, 0] 1641 [375, 1266]
LtH_6 vs Hyp_T6h 244 [56, 188] 81 [28, 53] 319 [114, 205]
LtH_7 vs Control 6019 [2217, 3802] 67 [33, 34] 3440 [1282, 2158]
LtH_7 vs Hyp_T6h 973 [655, 318] 569 [225, 344] 1259 [748, 511]
Recovery vs Control 3291 [593, 2698] 36 [24, 12] 2505 [578, 1927]
Recovery vs Hyp_T6h 296 [86, 210] 169 [106, 63] 738 [335, 403]
Reox vs Control 4092 [1157, 2935] 581 [349, 232] 3537 [1249, 2288]
Reox vs Hyp_T6h 84 [5, 79] 1 [1, 0] 65 [15, 50]
Reox vs Recovery 145 [82, 63] 199 [102, 97] 667 [358, 309]

Now within each tissue and across batches

# Run DESeq pipeline
design(dds_A) <- formula(~ batch.group)
dsr1 <- DESeq(dds_A)

# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery",
                                  "Hyp_T6h","Reox","Recovery","Reox","LtH_6","LtH_7"),
                          den = c("Control","Control","Control","Control","Control","Control",
                                  "Hyp_T2h","Hyp_T6h","Hyp_T6h","Recovery","Hyp_T6h","Hyp_T6h"))

# Get DESeq results for all group contrasts for each colony
DE_A <- crossing(batch = c("60", "71"), group.contrasts) %>%
  mutate(dsr = pmap(list(batch, num, den), function(x, y, z) {
    results(dsr1, contrast = c("batch.group", paste0(x, ".", c(y, z))))}))

Then, for both tissues together

# Run DESeq pipeline 
design(dds_A) <- formula(~ batch + treatment)
dsr2 <- DESeq(dds_A)

# Get DESeq results for all contrasts and join with results from each colony, from above
DE_A <- crossing(batch = "all", group.contrasts) %>%
  mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
  bind_rows(DE_A)

Get significant DEGs

DE_A <- DE_A %>%
  mutate(sig = map(dsr, ~ rownames_to_column(data.frame(.[which(.$padj < 0.1), ]), "gene")),
          up = map(sig, ~ filter(., log2FoldChange > 0)),
        down = map(sig, ~ filter(., log2FoldChange < 0))) 

# Generate logP values for all differential expression contrasts
DE_A <- DE_A %>% mutate(logP = map(dsr, ~ data.frame(
  gene = rownames(data.frame(.)),
  logP = -log10(data.frame(.)$pvalue) * sign(data.frame(.)$log2FoldChange))))

# Count number of differentially expressed genes within each colony, and overall, for each contrast
DE_Atab <- DE_A %>%
  filter((num == "Hyp_T2h" & den == "Control")|(num == "Hyp_T6h" & den == "Control")|
         (num == "LtH_6" & den == "Control")|(num == "LtH_7" & den == "Control")|
         (num == "Reox" & den == "Control")|(num == "Recovery" & den == "Control")|
         (num == "Hyp_T6h" & den == "Hyp_T2h")|(num == "Reox" & den == "Hyp_T6h")|
         (num == "Recovery" & den == "Hyp_T6h")|(num == "Reox" & den == "Recovery")|
         (num == "LtH_6" & den == "Hyp_T6h")|(num == "LtH_7" & den == "Hyp_T6h")) %>%
  mutate(nsig = map_dbl( sig, ~ nrow(.)),
          nup = map_dbl(  up, ~ nrow(.)),
          ndn = map_dbl(down, ~ nrow(.)),
          `DEGs [up, down]` = paste0(nsig, " [", nup, ", ", ndn, "]")) %>%
  mutate(family = case_when(batch == "60" ~ "lab parents", batch == "71" ~ "wild parents", 
                           batch == "all" ~ "all batches")) %>%
  mutate(family = factor(family, levels = c("lab parents", "wild parents", "all batches"))) %>%
  select(family, num, den, `DEGs [up, down]`) %>%
  spread(family, `DEGs [up, down]`)

DE_Atab$contrast <- paste(DE_Atab$num, DE_Atab$den, sep = " vs ")
dek <- DE_Atab %>% select(c(6,3:5)) %>%
  knitr::kable(format = "markdown", caption = "Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.", row.names = )

dek
Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.
contrast lab parents wild parents all batches
Hyp_T2h vs Control 6090 [1796, 4294] 4314 [1221, 3093] 8048 [2465, 5583]
Hyp_T6h vs Control 5882 [1743, 4139] 4581 [1502, 3079] 7889 [2570, 5319]
Hyp_T6h vs Hyp_T2h 9 [8, 1] 174 [154, 20] 114 [97, 17]
LtH_6 vs Control 4383 [1187, 3196] 4847 [1573, 3274] 7409 [2314, 5095]
LtH_6 vs Hyp_T6h 143 [61, 82] 265 [73, 192] 279 [116, 163]
LtH_7 vs Control 3293 [851, 2442] 6212 [2428, 3784] 7918 [2830, 5088]
LtH_7 vs Hyp_T6h 111 [47, 64] 998 [642, 356] 1048 [664, 384]
Recovery vs Control 4523 [1013, 3510] 3601 [867, 2734] 6737 [1873, 4864]
Recovery vs Hyp_T6h 290 [59, 231] 278 [71, 207] 604 [192, 412]
Reox vs Control 4545 [1118, 3427] 4330 [1354, 2976] 7225 [2159, 5066]
Reox vs Hyp_T6h 0 [0, 0] 63 [3, 60] 4 [0, 4]
Reox vs Recovery 106 [66, 40] 146 [97, 49] 320 [204, 116]
# Run DESeq pipeline
design(dds_Pp) <- formula(~ batch.group)
dsr1 <- DESeq(dds_Pp)

# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery",
                                  "Hyp_T6h","Reox","Recovery","Reox","LtH_6","LtH_7"),
                          den = c("Control","Control","Control","Control","Control","Control",
                                  "Hyp_T2h","Hyp_T6h","Hyp_T6h","Recovery","Hyp_T6h","Hyp_T6h"))

# Get DESeq results for all group contrasts for each colony
DE_Pp <- crossing(batch = c("60", "71"), group.contrasts) %>%
  mutate(dsr = pmap(list(batch, num, den), function(x, y, z) {
    results(dsr1, contrast = c("batch.group", paste0(x, ".", c(y, z))))}))

Then, for both tissues together

# Run DESeq pipeline 
design(dds_Pp) <- formula(~ batch + treatment)
dsr2 <- DESeq(dds_Pp)

# Get DESeq results for all contrasts and join with results from each colony, from above
DE_Pp <- crossing(batch = "all", group.contrasts) %>%
  mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
  bind_rows(DE_Pp)

Get significant DEGs

DE_Pp <- DE_Pp %>%
  mutate(sig = map(dsr, ~ rownames_to_column(data.frame(.[which(.$padj < 0.1), ]), "gene")),
          up = map(sig, ~ filter(., log2FoldChange > 0)),
        down = map(sig, ~ filter(., log2FoldChange < 0))) 

# Generate logP values for all differential expression contrasts
DE_Pp <- DE_Pp %>% mutate(logP = map(dsr, ~ data.frame(
  gene = rownames(data.frame(.)),
  logP = -log10(data.frame(.)$pvalue) * sign(data.frame(.)$log2FoldChange))))

# Count number of differentially expressed genes within each colony, and overall, for each contrast
DE_Pptab <- DE_Pp %>%
  filter((num == "Hyp_T2h" & den == "Control")|(num == "Hyp_T6h" & den == "Control")|
         (num == "LtH_6" & den == "Control")|(num == "LtH_7" & den == "Control")|
         (num == "Reox" & den == "Control")|(num == "Recovery" & den == "Control")|
         (num == "Hyp_T6h" & den == "Hyp_T2h")|(num == "Reox" & den == "Hyp_T6h")|
         (num == "Recovery" & den == "Hyp_T6h")|(num == "Reox" & den == "Recovery")|
         (num == "LtH_6" & den == "Hyp_T6h")|(num == "LtH_7" & den == "Hyp_T6h")) %>%
  mutate(nsig = map_dbl( sig, ~ nrow(.)),
          nup = map_dbl(  up, ~ nrow(.)),
          ndn = map_dbl(down, ~ nrow(.)),
          `DEGs [up, down]` = paste0(nsig, " [", nup, ", ", ndn, "]")) %>%
  mutate(family = case_when(batch == "60" ~ "lab parents", batch == "71" ~ "wild parents", 
                           batch == "all" ~ "all batches")) %>%
  mutate(family = factor(family, levels = c("lab parents", "wild parents", "all batches"))) %>%
  select(family, num, den, `DEGs [up, down]`) %>%
  spread(family, `DEGs [up, down]`)

DE_Pptab$contrast <- paste(DE_Pptab$num, DE_Pptab$den, sep = " vs ")
dek <- DE_Pptab %>% select(c(6,3:5)) %>%
  knitr::kable(format = "markdown", caption = "Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.", row.names = )

dek
Number of differentially expressed genes within individual genets and across all genets for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.1. In brackets are the numbers of significantly up- and down-regulated genes.
contrast lab parents wild parents all batches
Hyp_T2h vs Control 1266 [406, 860] 270 [117, 153] 1557 [599, 958]
Hyp_T6h vs Control 817 [266, 551] 165 [97, 68] 973 [431, 542]
Hyp_T6h vs Hyp_T2h 38 [23, 15] 5 [3, 2] 73 [57, 16]
LtH_6 vs Control 1270 [463, 807] 0 [0, 0] 1333 [613, 720]
LtH_6 vs Hyp_T6h 1656 [908, 748] 118 [37, 81] 2200 [1100, 1100]
LtH_7 vs Control 200 [75, 125] 191 [97, 94] 599 [250, 349]
LtH_7 vs Hyp_T6h 895 [508, 387] 757 [267, 490] 2234 [1117, 1117]
Recovery vs Control 330 [76, 254] 51 [38, 13] 451 [216, 235]
Recovery vs Hyp_T6h 279 [133, 146] 213 [132, 81] 647 [338, 309]
Reox vs Control 552 [203, 349] 600 [342, 258] 1378 [664, 714]
Reox vs Hyp_T6h 1 [0, 1] 1 [1, 0] 14 [0, 14]
Reox vs Recovery 165 [103, 62] 204 [69, 135] 462 [235, 227]

DEG Similarities

Similarity of differentially expressed genes across tissue and contrasts

DEGs in common across tissues and batches
# Find genes that are DE in same direction in both tissues when analyzed individually
DE %>%
  unnest(sig) %>%
  filter(tissue != "all") %>%
  group_by(num, den) %>%
  dplyr::count(gene, log2FoldChange > 0) %>%   # In how many genets is same gene DE in same direction?
  summarise(`shared` = sum(n==2)) %>%
  knitr::kable(caption = "Number of differentially expressed genes in common between abdominal and Pleural/Pedal ganglia (Batch 60)")  %>%
  kable_styling(full_width = TRUE)
Number of differentially expressed genes in common between abdominal and Pleural/Pedal ganglia (Batch 60)
num den shared
Hyp_T2h Control 636
Hyp_T6h Control 399
Hyp_T6h Hyp_T2h 7
LtH_6 Control 287
LtH_6 Hyp_T6h 122
LtH_7 Control 32
LtH_7 Hyp_T6h 80
Recovery Control 137
Recovery Hyp_T6h 164
Reox Control 330
Reox Hyp_T6h 0
Reox Recovery 65
DE_71 %>%
  unnest(sig) %>%
  filter(tissue != "all") %>%
  group_by(num, den) %>%
  dplyr::count(gene, log2FoldChange > 0) %>%   # In how many genets is same gene DE in same direction?
  summarise(`shared` = sum(n==2)) %>%
  knitr::kable(caption = "Number of differentially expressed genes in common between abdominal and Pleural/Pedal ganglia (Batch 71)")  %>%
  kable_styling(full_width = TRUE)
Number of differentially expressed genes in common between abdominal and Pleural/Pedal ganglia (Batch 71)
num den shared
Hyp_T2h Control 112
Hyp_T6h Control 88
Hyp_T6h Hyp_T2h 2
LtH_6 Control 0
LtH_6 Hyp_T6h 17
LtH_7 Control 21
LtH_7 Hyp_T6h 76
Recovery Control 23
Recovery Hyp_T6h 73
Reox Control 307
Reox Hyp_T6h 0
Reox Recovery 76
DE_A %>%
  unnest(sig) %>%
  filter(batch != "all") %>%
  group_by(num, den) %>%
  dplyr::count(gene, log2FoldChange > 0) %>%   # In how many genets is same gene DE in same direction?
  summarise(`shared` = sum(n==2)) %>%
  knitr::kable(caption = "Number of differentially expressed genes in common between batches 60 and 71 (Abdominal ganglium)")  %>%
  kable_styling(full_width = TRUE)
Number of differentially expressed genes in common between batches 60 and 71 (Abdominal ganglium)
num den shared
Hyp_T2h Control 3260
Hyp_T6h Control 3131
Hyp_T6h Hyp_T2h 2
LtH_6 Control 2638
LtH_6 Hyp_T6h 19
LtH_7 Control 2182
LtH_7 Hyp_T6h 17
Recovery Control 2365
Recovery Hyp_T6h 54
Reox Control 2660
Reox Hyp_T6h 0
Reox Recovery 28
DE_Pp %>%
  unnest(sig) %>%
  filter(batch != "all") %>%
  group_by(num, den) %>%
  dplyr::count(gene, log2FoldChange > 0) %>%   # In how many genets is same gene DE in same direction?
  summarise(`shared` = sum(n==2)) %>%
  knitr::kable(caption = "Number of differentially expressed genes in common between batches 60 and 71 (Pleural/Pedal)")  %>%
  kable_styling(full_width = TRUE)
Number of differentially expressed genes in common between batches 60 and 71 (Pleural/Pedal)
num den shared
Hyp_T2h Control 111
Hyp_T6h Control 53
Hyp_T6h Hyp_T2h 4
LtH_6 Control 0
LtH_6 Hyp_T6h 76
LtH_7 Control 42
LtH_7 Hyp_T6h 281
Recovery Control 7
Recovery Hyp_T6h 54
Reox Control 98
Reox Hyp_T6h 0
Reox Recovery 34
DEGs_60 <- DE %>%
  unnest(sig) %>%
  filter(tissue == "all") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene)
write_tsv(DEGs_60, path = "output/DEGs_by_contrast_batch 60_all_ganglia.tsv")

DEGs_60A <- DE %>%
  unnest(sig) %>%
  filter(tissue == "A") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene)
write_tsv(DEGs_60A, path = "output/DEGs_by_contrast_batch 60_abdominal.tsv")

DEGs_60Pp <- DE %>%
  unnest(sig) %>%
  filter(tissue == "Pp") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene)
write_tsv(DEGs_60Pp, path = "output/DEGs_by_contrast_batch 60_PlePed.tsv")

DEGs_71 <- DE %>%
  unnest(sig) %>%
  filter(tissue == "all") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene)
write_tsv(DEGs_71, path = "output/DEGs_by_contrast_batch 71_all_ganglia.tsv")

DEGs_71A <- DE %>%
  unnest(sig) %>%
  filter(tissue == "A") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene)
write_tsv(DEGs_71A, path = "output/DEGs_by_contrast_batch 71_abdominal.tsv")

DEGs_71Pp <- DE %>%
  unnest(sig) %>%
  filter(tissue == "Pp") %>%
  mutate(contrast = paste(num, den, sep = ".")) %>%
  select(.,contrast, gene)
write_tsv(DEGs_71Pp, path = "output/DEGs_by_contrast_batch 71_PlePed.tsv")

DEGs in common beteween peak of exposure vs Ctrl contrasts (T6h vs Ctrol, LtH_6 vs Ctrl & LtH_7 vs Ctrl)

in batch 60 abdominal

genesincommon_60A <- DE %>%
  unite("contrast", num, den, sep = ".") %>%
  filter(tissue == "A", contrast %in% c("Hyp_T6h.Control", "LtH_6.Control", "LtH_7.Control")) %>%
  unnest(sig) %>%
  select(contrast, gene, log2FoldChange) %>%
  pivot_wider(names_from = contrast, values_from = log2FoldChange) %>%
  drop_na() %>%
  mutate(gene = factor(gene, levels = gene))
genesincommon_60A <- genesincommon_60A[c(1:100),]  
# Get normalized counts for the genes in common
gcounts <- data.frame(assay(vsd_60[which(rownames(assay(vsd_60)) %in% genesincommon_60A$gene), ]))
gcounts <- rownames_to_column(gcounts, var = "gene") %>%
  pivot_longer(-gene, names_to = "sample_ID", values_to = "count") %>%
  left_join(select(sdat0, sample_ID, treatment)) %>%
  mutate(gene = factor(gene, levels = levels(genesincommon_60A$gene))) %>%
  filter(treatment != "Control") %>%
  filter(treatment != "Hyp_T2h") %>%
  filter(treatment != "Reox") %>%
  filter(treatment != "Recovery")


# Get group log2foldchanges for plotting
gl2fc <- genesincommon_60A %>%
  pivot_longer(-gene, names_to = "treatment", values_to = "l2fc") %>%
  mutate(treatment = gsub(".Control", "", treatment))
  
# Get group means for plotting
gcountsmean <- gcounts %>%
  group_by(gene, treatment) %>%
  summarise(count = mean(count))

# Plot
plotgcounts <- gcounts %>%
  ggplot(aes(x = treatment, y = pmax(0,log(count)))) +
  geom_jitter(width = 0.2, aes(color = treatment)) +
  geom_line(data = gcountsmean, aes(group = gene)) +
  geom_text(data = gl2fc, aes(y = 2.4, label = round(l2fc, 2))) +
  facet_wrap(~ gene) +
  theme_custom() +
  theme(legend.position = "none") +
  labs(x = "", y = "log(Variance stabilized counts)") +
  scale_y_continuous(limits = c(0, 2.6))
plotgcounts

ggsave(file = "figures/FigS_shared_genes_60A.png", width = 360, height = 360, units = "mm")

in batch 60 Pleural/Pedal

genesincommon_60Pp <- DE %>%
  unite("contrast", num, den, sep = ".") %>%
  filter(tissue == "Pp", contrast %in% c("Hyp_T6h.Control", "LtH_6.Control", "LtH_7.Control")) %>%
  unnest(sig) %>%
  select(contrast, gene, log2FoldChange) %>%
  pivot_wider(names_from = contrast, values_from = log2FoldChange) %>%
  drop_na() %>%
  mutate(gene = factor(gene, levels = gene))  
# Get normalized counts for the genes in common
gcounts <- data.frame(assay(vsd_60[which(rownames(assay(vsd_60)) %in% genesincommon_60Pp$gene), ]))
gcounts <- rownames_to_column(gcounts, var = "gene") %>%
  pivot_longer(-gene, names_to = "sample_ID", values_to = "count") %>%
  left_join(select(sdat0, sample_ID, treatment)) %>%
  mutate(gene = factor(gene, levels = levels(genesincommon_60Pp$gene))) %>%
  filter(treatment != "Control") %>%
  filter(treatment != "Hyp_T2h") %>%
  filter(treatment != "Reox") %>%
  filter(treatment != "Recovery")


# Get group log2foldchanges for plotting
gl2fc <- genesincommon_60Pp %>%
  pivot_longer(-gene, names_to = "treatment", values_to = "l2fc") %>%
  mutate(treatment = gsub(".Control", "", treatment))
  
# Get group means for plotting
gcountsmean <- gcounts %>%
  group_by(gene, treatment) %>%
  summarise(count = mean(count))

# Plot
plotgcounts <- gcounts %>%
  ggplot(aes(x = treatment, y = pmax(0,log(count)))) +
  geom_jitter(width = 0.2, aes(color = treatment)) +
  geom_line(data = gcountsmean, aes(group = gene)) +
  geom_text(data = gl2fc, aes(y = 2.4, label = round(l2fc, 2))) +
  facet_wrap(~ gene) +
  theme_custom() +
  theme(legend.position = "none") +
  labs(x = "", y = "log(Variance stabilized counts)") +
  scale_y_continuous(limits = c(0, 2.6))
plotgcounts

ggsave(file = "figures/FigS_shared_genes_60Pp.png", width = 360, height = 360, units = "mm")

in batch 71 abdominal

genesincommon_71A <- DE %>%
  unite("contrast", num, den, sep = ".") %>%
  filter(tissue == "A", contrast %in% c("Hyp_T6h.Control", "LtH_6.Control", "LtH_7.Control")) %>%
  unnest(sig) %>%
  select(contrast, gene, log2FoldChange) %>%
  pivot_wider(names_from = contrast, values_from = log2FoldChange) %>%
  drop_na() %>%
  mutate(gene = factor(gene, levels = gene))
genesincommon_71A <- genesincommon_71A[c(1:100),]  
# Get normalized counts for the genes in common
gcounts <- data.frame(assay(vsd_71[which(rownames(assay(vsd_71)) %in% genesincommon_71A$gene), ]))
gcounts <- rownames_to_column(gcounts, var = "gene") %>%
  pivot_longer(-gene, names_to = "sample_ID", values_to = "count") %>%
  left_join(select(sdat0, sample_ID, treatment)) %>%
  mutate(gene = factor(gene, levels = levels(genesincommon_71A$gene))) %>%
  filter(treatment != "Control") %>%
  filter(treatment != "Hyp_T2h") %>%
  filter(treatment != "Reox") %>%
  filter(treatment != "Recovery")


# Get group log2foldchanges for plotting
gl2fc <- genesincommon_71A %>%
  pivot_longer(-gene, names_to = "treatment", values_to = "l2fc") %>%
  mutate(treatment = gsub(".Control", "", treatment))
  
# Get group means for plotting
gcountsmean <- gcounts %>%
  group_by(gene, treatment) %>%
  summarise(count = mean(count))

# Plot
plotgcounts <- gcounts %>%
  ggplot(aes(x = treatment, y = pmax(0,log(count)))) +
  geom_jitter(width = 0.2, aes(color = treatment)) +
  geom_line(data = gcountsmean, aes(group = gene)) +
  geom_text(data = gl2fc, aes(y = 2.4, label = round(l2fc, 2))) +
  facet_wrap(~ gene) +
  theme_custom() +
  theme(legend.position = "none") +
  labs(x = "", y = "log(Variance stabilized counts)") +
  scale_y_continuous(limits = c(0, 2.6))
plotgcounts

ggsave(file = "figures/FigS_shared_genes_71A.png", width = 360, height = 360, units = "mm")

in batch 71 Pleural/Pedal

genesincommon_71Pp <- DE %>%
  unite("contrast", num, den, sep = ".") %>%
  filter(tissue == "Pp", contrast %in% c("Hyp_T6h.Control", "LtH_6.Control", "LtH_7.Control")) %>%
  unnest(sig) %>%
  select(contrast, gene, log2FoldChange) %>%
  pivot_wider(names_from = contrast, values_from = log2FoldChange) %>%
  drop_na() %>%
  mutate(gene = factor(gene, levels = gene))  
# Get normalized counts for the genes in common
gcounts <- data.frame(assay(vsd_71[which(rownames(assay(vsd_71)) %in% genesincommon_71Pp$gene), ]))
gcounts <- rownames_to_column(gcounts, var = "gene") %>%
  pivot_longer(-gene, names_to = "sample_ID", values_to = "count") %>%
  left_join(select(sdat0, sample_ID, treatment)) %>%
  mutate(gene = factor(gene, levels = levels(genesincommon_71Pp$gene))) %>%
  filter(treatment != "Control") %>%
  filter(treatment != "Hyp_T2h") %>%
  filter(treatment != "Reox") %>%
  filter(treatment != "Recovery")


# Get group log2foldchanges for plotting
gl2fc <- genesincommon_71Pp %>%
  pivot_longer(-gene, names_to = "treatment", values_to = "l2fc") %>%
  mutate(treatment = gsub(".Control", "", treatment))
  
# Get group means for plotting
gcountsmean <- gcounts %>%
  group_by(gene, treatment) %>%
  summarise(count = mean(count))

# Plot
plotgcounts <- gcounts %>%
  ggplot(aes(x = treatment, y = pmax(0,log(count)))) +
  geom_jitter(width = 0.2, aes(color = treatment)) +
  geom_line(data = gcountsmean, aes(group = gene)) +
  geom_text(data = gl2fc, aes(y = 2.4, label = round(l2fc, 2))) +
  facet_wrap(~ gene) +
  theme_custom() +
  theme(legend.position = "none") +
  labs(x = "", y = "log(Variance stabilized counts)") +
  scale_y_continuous(limits = c(0, 2.6))
plotgcounts

ggsave(file = "figures/FigS_shared_genes_71Pp.png", width = 360, height = 360, units = "mm")

GO enrichment analysis

# Start with abdominal 
# Control vs 2h contrast (Rapid response)
setwd("analyses/GO_MWU/")

# Write all-batches control vs. 2h hypoxia logP values to file for GO_MWU analysis
DE_A %>%
  filter(batch == "all", num == "Hyp_T2h", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "A_2h.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("A_2h.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 16  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("A_2h.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 49  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("A_2h.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 18  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
A_Hyp2h.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_A_2h.vs.Ctrl.logP.txt",
    MF = "MWU_MF_A_2h.vs.Ctrl.logP.txt",
    CC = "MWU_CC_A_2h.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_A_2h.vs.Ctrl.logP.txt",
    MF = "MF_A_2h.vs.Ctrl.logP.txt",
    CC = "CC_A_2h.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
A_Hyp2h.Ctrl.GO <- A_Hyp2h.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
A_Hyp2h.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 22/27 carbohydrate metabolic process -611 0.0079353
BP 35/34 immune response -554 0.0073630
BP 403/795 system development -173 0.0001156
BP 156/245 RNA processing 418 0.0000000
BP 100/166 cellular component assembly 428 0.0000001
BP 9/21 spliceosomal snRNP assembly 785 0.0016750
BP 10/11 formation of cytoplasmic translation initiation complex 1171 0.0005090
CC 9/15 cornified envelope -597 0.0065812
CC 179/232 plasma membrane -388 0.0000000
CC 33/64 intrinsic component of plasma membrane -307 0.0045284
CC 96/184 integral component of membrane -189 0.0045284
CC 308/639 extracellular region part -133 0.0004904
CC 37/79 tethering complex 253 0.0096193
CC 72/162 transferase complex 265 0.0000564
CC 107/164 mitochondrion 287 0.0000098
CC 15/16 inner mitochondrial membrane protein complex 613 0.0045284
CC 51/61 peptidase complex 741 0.0000000
CC 7/6 proton-transporting two-sector ATPase complex, catalytic domain 979 0.0046386
MF 16/19 triglyceride lipase activity -1017 0.0045131
MF 14/21 N-acetylgalactosamine-4-sulfatase activity -969 0.0045131
MF 43/51 sodium ion transmembrane transporter activity -901 0.0000060
MF 20/31 sulfuric ester hydrolase activity -787 0.0045774
MF 70/91 active transmembrane transporter activity -636 0.0000263
MF 71/94 anion transmembrane transporter activity -576 0.0001355
MF 49/73 peptide receptor activity -502 0.0057614
MF 256/378 molecular transducer activity -472 0.0000000
MF 89/127 virus receptor activity -395 0.0045774
MF 230/307 ion transmembrane transporter activity -339 0.0000730
MF 170/208 cation transmembrane transporter activity -287 0.0087553
MF 303/540 extracellular matrix structural constituent 299 0.0000051
MF 71/161 opsin binding 341 0.0057614
MF 108/242 low-density lipoprotein particle receptor activity 440 0.0000038
MF 41/75 tRNA binding 483 0.0065480
MF 30/39 isomerase activity 850 0.0002991
MF 12/24 structural constituent of ribosome 852 0.0065480
MF 15/19 ATPase regulator activity 1177 0.0004958
MF 10/10 electron transfer activity 1328 0.0063691
setwd("analyses/GO_MWU/")

# Write all-batches control vs. 6h hypoxia logP values to file for GO_MWU analysis
DE_A %>%
  filter(batch == "all", num == "Hyp_T6h", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "A_6h.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("A_6h.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 13  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("A_6h.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 47  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("A_6h.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 20  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
A_Hyp6h.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_A_6h.vs.Ctrl.logP.txt",
    MF = "MWU_MF_A_6h.vs.Ctrl.logP.txt",
    CC = "MWU_CC_A_6h.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_A_6h.vs.Ctrl.logP.txt",
    MF = "MF_A_6h.vs.Ctrl.logP.txt",
    CC = "CC_A_6h.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
A_Hyp6h.Ctrl.GO <- A_Hyp6h.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
A_Hyp6h.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 36/34 immune response -634 0.0010796
BP 407/795 system development -164 0.0002707
BP 133/245 RNA processing 377 0.0000000
BP 103/166 cellular component assembly 444 0.0000000
BP 9/11 formation of cytoplasmic translation initiation complex 1239 0.0002123
CC 23/40 sperm part -432 0.0012306
CC 173/232 plasma membrane -378 0.0000000
CC 314/639 extracellular region part -112 0.0044181
CC 100/164 mitochondrion 227 0.0008607
CC 49/90 microtubule organizing center part 242 0.0097249
CC 66/162 transferase complex 262 0.0000976
CC 51/61 peptidase complex 717 0.0000000
CC 20/16 inner mitochondrial membrane protein complex 721 0.0007006
CC 7/6 proton-transporting two-sector ATPase complex, catalytic domain 992 0.0044181
MF 17/19 triglyceride lipase activity -1040 0.0025459
MF 15/21 N-acetylgalactosamine-4-sulfatase activity -1008 0.0020786
MF 39/51 sodium ion transmembrane transporter activity -913 0.0000031
MF 26/31 sulfuric ester hydrolase activity -801 0.0029983
MF 55/73 peptide receptor activity -655 0.0000870
MF 78/91 active transmembrane transporter activity -654 0.0000104
MF 69/94 anion transmembrane transporter activity -618 0.0000261
MF 270/378 molecular transducer activity -548 0.0000000
MF 53/79 monooxygenase activity -470 0.0063296
MF 87/127 inorganic anion transmembrane transporter activity -415 0.0020786
MF 237/307 ion transmembrane transporter activity -390 0.0000031
MF 116/158 channel activity -347 0.0046695
MF 170/208 cation transmembrane transporter activity -336 0.0016677
MF 268/540 extracellular matrix structural constituent 370 0.0000000
MF 110/242 low-density lipoprotein particle receptor activity 431 0.0000031
MF 38/75 tRNA binding 517 0.0029983
MF 37/39 isomerase activity 930 0.0000372
MF 10/10 electron transfer activity 1326 0.0057562
MF 17/19 ATPase regulator activity 1352 0.0000279
# Start with abdominal 

setwd("analyses/GO_MWU/")

# Write all-batches control vs. 6days repeated hypoxia logP values to file for GO_MWU analysis
DE_A %>%
  filter(batch == "all", num == "LtH_6", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "A_6d.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("A_6d.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 22  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("A_6d.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 43  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("A_6d.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 19  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
A_Hyp6d.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_A_6d.vs.Ctrl.logP.txt",
    MF = "MWU_MF_A_6d.vs.Ctrl.logP.txt",
    CC = "MWU_CC_A_6d.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_A_6d.vs.Ctrl.logP.txt",
    MF = "MF_A_6d.vs.Ctrl.logP.txt",
    CC = "CC_A_6d.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
A_Hyp6d.Ctrl.GO <- A_Hyp6d.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
A_Hyp6d.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 34/34 immune response -620 0.0011300
BP 21/27 carbohydrate metabolic process -606 0.0061546
BP 34/48 regulation of neurotransmitter levels -515 0.0012789
BP 33/46 regulation of immune response -468 0.0061546
BP 380/795 system development -195 0.0000052
BP 53/117 cellular amide metabolic process 291 0.0068637
BP 27/79 ncRNA metabolic process 439 0.0004644
BP 98/166 cellular component assembly 489 0.0000000
BP 145/245 RNA processing 553 0.0000000
BP 13/21 maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA) 663 0.0079126
BP 9/21 spliceosomal snRNP assembly 796 0.0011130
BP 11/11 formation of cytoplasmic translation initiation complex 1403 0.0000075
CC 161/232 plasma membrane -436 0.0000000
CC 44/64 integral component of plasma membrane -377 0.0003029
CC 30/40 sperm part -368 0.0059264
CC 126/184 integral component of membrane -203 0.0015531
CC 313/639 extracellular region part -112 0.0033233
CC 64/90 microtubule organizing center part 265 0.0033233
CC 35/79 tethering complex 280 0.0033233
CC 63/162 transferase complex 281 0.0000144
CC 109/164 mitochondrion 283 0.0000139
CC 15/16 inner mitochondrial membrane protein complex 688 0.0010278
CC 47/61 peptidase complex 774 0.0000000
MF 41/51 sodium ion transmembrane transporter activity -947 0.0000017
MF 13/21 N-acetylgalactosamine-4-sulfatase activity -872 0.0091488
MF 52/73 peptide receptor activity -779 0.0000019
MF 21/31 sulfuric ester hydrolase activity -743 0.0067349
MF 64/91 active transmembrane transporter activity -700 0.0000019
MF 62/94 anion transmembrane transporter activity -642 0.0000088
MF 273/378 molecular transducer activity -631 0.0000000
MF 215/307 ion transmembrane transporter activity -409 0.0000011
MF 77/127 inorganic anion transmembrane transporter activity -385 0.0044930
MF 163/208 cation transmembrane transporter activity -373 0.0002583
MF 114/158 channel activity -363 0.0025880
MF 248/540 extracellular matrix structural constituent 254 0.0001028
MF 71/161 opsin binding 321 0.0091488
MF 107/242 low-density lipoprotein particle receptor activity 414 0.0000066
MF 37/75 tRNA binding 710 0.0000100
MF 15/28 pyruvate carboxylase activity 853 0.0025880
MF 15/24 structural constituent of ribosome 917 0.0025880
MF 35/39 isomerase activity 1040 0.0000028
MF 8/19 ATPase regulator activity 1169 0.0004225
MF 11/11 intramolecular oxidoreductase activity, transposing S-S bonds 1302 0.0041709
MF 10/10 electron transfer activity 1428 0.0025880
MF 5/5 ATPase activity 1766 0.0097926
# Start with abdominal 

setwd("analyses/GO_MWU/")

# Write all-batches control vs. 7d hypoxia logP values to file for GO_MWU analysis
DE_A %>%
  filter(batch == "all", num == "LtH_7", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "A_7d.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("A_7d.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 25  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("A_7d.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 53  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("A_7d.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 19  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
A_Hyp7d.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_A_7d.vs.Ctrl.logP.txt",
    MF = "MWU_MF_A_7d.vs.Ctrl.logP.txt",
    CC = "MWU_CC_A_7d.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_A_7d.vs.Ctrl.logP.txt",
    MF = "MF_A_7d.vs.Ctrl.logP.txt",
    CC = "CC_A_7d.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
A_Hyp7d.Ctrl.GO <- A_Hyp7d.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
A_Hyp7d.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 23/34 immune response -532 0.0063754
BP 34/48 regulation of neurotransmitter levels -520 0.0011092
BP 30/46 regulation of immune response -453 0.0066880
BP 42/56 regulation of response to stimulus -395 0.0094048
BP 95/122 localization -289 0.0057851
BP 379/795 system development -186 0.0000152
BP 495/1091 lipid metabolic process -119 0.0051896
BP 42/79 ncRNA processing 445 0.0002958
BP 161/245 RNA processing 553 0.0000000
BP 133/166 cellular component assembly 565 0.0000000
BP 110/117 cellular amide metabolic process 568 0.0000000
BP 16/21 maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA) 642 0.0094048
BP 16/21 spliceosomal snRNP assembly 843 0.0003303
BP 9/11 ribosomal small subunit assembly 1045 0.0016524
BP 11/11 formation of cytoplasmic translation initiation complex 1340 0.0000200
CC 166/232 plasma membrane -397 0.0000000
CC 50/64 integral component of plasma membrane -354 0.0007536
CC 123/184 integral component of membrane -202 0.0016732
CC 83/162 transferase complex 200 0.0036896
CC 107/164 mitochondrion 333 0.0000001
CC 77/90 microtubule organizing center part 342 0.0000782
CC 44/61 peptidase complex 709 0.0000000
CC 17/16 inner mitochondrial membrane protein complex 840 0.0000311
CC 6/6 proton-transporting two-sector ATPase complex, catalytic domain 932 0.0083501
MF 16/19 triglyceride lipase activity -1120 0.0007217
MF 58/73 peptide receptor activity -905 0.0000000
MF 21/27 neurotransmitter receptor activity -904 0.0013142
MF 36/51 sodium ion transmembrane transporter activity -845 0.0000169
MF 60/91 active transmembrane transporter activity -687 0.0000023
MF 292/378 molecular transducer activity -648 0.0000000
MF 53/94 anion transmembrane transporter activity -616 0.0000222
MF 131/158 channel activity -573 0.0000002
MF 226/307 ion transmembrane transporter activity -516 0.0000000
MF 178/208 cation transmembrane transporter activity -484 0.0000006
MF 61/79 voltage-gated ion channel activity -466 0.0061579
MF 74/127 inorganic anion transmembrane transporter activity -376 0.0051134
MF 262/540 extracellular matrix structural constituent 261 0.0000506
MF 191/294 oxidoreductase activity 271 0.0022608
MF 60/161 opsin binding 360 0.0022608
MF 125/242 low-density lipoprotein particle receptor activity 492 0.0000000
MF 45/75 tRNA binding 603 0.0002988
MF 14/28 pyruvate carboxylase activity 798 0.0048855
MF 27/39 isomerase activity 928 0.0000353
MF 13/19 ATPase regulator activity 998 0.0034149
MF 12/24 structural constituent of ribosome 1019 0.0005311
MF 10/10 electron transfer activity 1615 0.0003643
# Start with abdominal 

setwd("analyses/GO_MWU/")

# Write all-batches control vs. Recovery hypoxia logP values to file for GO_MWU analysis
DE_A %>%
  filter(batch == "all", num == "Recovery", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "A_Recovery.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("A_Recovery.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 12  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("A_Recovery.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 42  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("A_Recovery.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 18  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
A_HypRecovery.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_A_Recovery.vs.Ctrl.logP.txt",
    MF = "MWU_MF_A_Recovery.vs.Ctrl.logP.txt",
    CC = "MWU_CC_A_Recovery.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_A_Recovery.vs.Ctrl.logP.txt",
    MF = "MF_A_Recovery.vs.Ctrl.logP.txt",
    CC = "CC_A_Recovery.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
A_HypRecovery.Ctrl.GO <- A_HypRecovery.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
A_HypRecovery.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 37/34 immune response -642 0.0014686
BP 93/245 RNA processing 292 0.0000650
BP 89/166 cellular component assembly 418 0.0000007
CC 172/232 plasma membrane -409 0.0000000
CC 36/64 integral component of plasma membrane -374 0.0004235
CC 99/184 integral component of membrane -189 0.0042062
CC 265/639 extracellular region part -123 0.0017606
CC 34/79 tethering complex 287 0.0038662
CC 59/162 transferase complex 320 0.0000007
CC 20/61 peptidase complex 424 0.0000734
MF 45/51 sodium ion transmembrane transporter activity -999 0.0000003
MF 13/21 N-acetylgalactosamine-4-sulfatase activity -955 0.0042432
MF 14/19 triglyceride lipase activity -939 0.0084033
MF 20/31 nucleobase-containing compound transmembrane transporter activity -857 0.0013741
MF 16/31 sulfuric ester hydrolase activity -747 0.0078907
MF 92/91 active transmembrane transporter activity -662 0.0000117
MF 42/73 peptide receptor activity -632 0.0002052
MF 61/94 anion transmembrane transporter activity -596 0.0000732
MF 241/378 molecular transducer activity -545 0.0000000
MF 42/62 serine-type peptidase activity -514 0.0096787
MF 52/79 monooxygenase activity -464 0.0084033
MF 207/307 ion transmembrane transporter activity -358 0.0000222
MF 132/208 cation transmembrane transporter activity -343 0.0011874
MF 57/161 opsin binding 352 0.0042432
MF 87/242 low-density lipoprotein particle receptor activity 371 0.0000903
MF 226/540 extracellular matrix structural constituent 391 0.0000000
MF 28/39 isomerase activity 900 0.0000903
MF 15/19 ATPase regulator activity 1134 0.0008771
# Start with pleural/pedal 
# Control vs 2h contrast (Rapid response)
setwd("analyses/GO_MWU/")

# Write all-batches control vs. 2h hypoxia logP values to file for GO_MWU analysis
DE_Pp %>%
  filter(batch == "all", num == "Hyp_T2h", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "P_2h.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("P_2h.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 8  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("P_2h.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 2  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("P_2h.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 3  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
P_Hyp2h.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_P_2h.vs.Ctrl.logP.txt",
    MF = "MWU_MF_P_2h.vs.Ctrl.logP.txt",
    CC = "MWU_CC_P_2h.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_P_2h.vs.Ctrl.logP.txt",
    MF = "MF_P_2h.vs.Ctrl.logP.txt",
    CC = "CC_P_2h.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
P_Hyp2h.Ctrl.GO <- P_Hyp2h.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
P_Hyp2h.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 5/11 ribosomal small subunit assembly -1012 0.0084032
BP 67/115 cellular amide metabolic process -588 0.0000000
CC 18/61 peptidase complex 509 0.0000008
CC 9/23 phosphatase complex 533 0.0069969
setwd("analyses/GO_MWU/")

# Write all-batches control vs. 6h hypoxia logP values to file for GO_MWU analysis
DE_Pp %>%
  filter(batch == "all", num == "Hyp_T6h", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "P_6h.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("P_6h.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 7  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("P_6h.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 2  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("P_6h.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 3  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
P_Hyp6h.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_P_6h.vs.Ctrl.logP.txt",
    MF = "MWU_MF_P_6h.vs.Ctrl.logP.txt",
    CC = "MWU_CC_P_6h.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_P_6h.vs.Ctrl.logP.txt",
    MF = "MF_P_6h.vs.Ctrl.logP.txt",
    CC = "CC_P_6h.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
P_Hyp6h.Ctrl.GO <- P_Hyp6h.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
P_Hyp6h.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 45/115 cellular amide metabolic process -491 0.0000003
BP 22/80 ncRNA processing -388 0.0039914
BP 89/245 RNA processing -364 0.0000001
CC 9/23 protein serine/threonine phosphatase complex 610 0.0015012
# Start with pleural/pedal 

setwd("analyses/GO_MWU/")

# Write all-batches control vs. 6days repeated hypoxia logP values to file for GO_MWU analysis
DE_Pp %>%
  filter(batch == "all", num == "LtH_6", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "P_6d.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("P_6d.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 11  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("P_6d.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 18  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("P_6d.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 6  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
P_Hyp6d.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_P_6d.vs.Ctrl.logP.txt",
    MF = "MWU_MF_P_6d.vs.Ctrl.logP.txt",
    CC = "MWU_CC_P_6d.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_P_6d.vs.Ctrl.logP.txt",
    MF = "MF_P_6d.vs.Ctrl.logP.txt",
    CC = "CC_P_6d.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
P_Hyp6d.Ctrl.GO <- P_Hyp6d.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
P_Hyp6d.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 6/11 ribosomal small subunit assembly -1219 0.0001326
BP 8/13 maturation of LSU-rRNA -863 0.0057349
BP 11/21 maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA) -752 0.0018653
BP 51/115 cellular amide metabolic process -504 0.0000001
BP 27/80 ncRNA processing -425 0.0004726
BP 76/167 cellular component assembly -395 0.0000007
BP 109/245 RNA processing -386 0.0000000
BP 191/1077 lipid metabolic process 124 0.0034794
BP 84/542 regulation of multicellular organismal process 147 0.0057349
CC 4/6 proton-transporting two-sector ATPase complex, catalytic domain -952 0.0090577
CC 11/16 inner mitochondrial membrane protein complex -746 0.0005015
CC 13/61 peptidase complex -305 0.0090577
CC 47/162 mitochondrion -262 0.0001965
CC 60/226 plasma membrane 164 0.0090577
MF 106/347 molecular transducer activity 301 0.0007832
MF 58/157 passive transmembrane transporter activity 378 0.0065812
# Start with pleural/pedal 

setwd("analyses/GO_MWU/")

# Write all-batches control vs. 7d hypoxia logP values to file for GO_MWU analysis
DE_Pp %>%
  filter(batch == "all", num == "LtH_7", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "P_7d.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("P_7d.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 11  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("P_7d.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 13  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("P_7d.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 8  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
P_Hyp7d.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_P_7d.vs.Ctrl.logP.txt",
    MF = "MWU_MF_P_7d.vs.Ctrl.logP.txt",
    CC = "MWU_CC_P_7d.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_P_7d.vs.Ctrl.logP.txt",
    MF = "MF_P_7d.vs.Ctrl.logP.txt",
    CC = "CC_P_7d.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
P_Hyp7d.Ctrl.GO <- P_Hyp7d.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
P_Hyp7d.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 7/21 maturation of SSU-rRNA -713 0.0063902
BP 15/80 ncRNA processing -438 0.0004304
BP 53/167 cellular component assembly -402 0.0000006
BP 70/245 RNA processing -368 0.0000000
BP 150/1077 lipid metabolic process 118 0.0094546
MF 62/298 ion transmembrane transporter activity 260 0.0079972
MF 49/192 metal ion transmembrane transporter activity 357 0.0027573
MF 50/347 molecular transducer activity 368 0.0000034
MF 55/157 channel activity 388 0.0027573
MF 16/71 peptide receptor activity 524 0.0079972
# Start with pleural/pedal 

setwd("analyses/GO_MWU/")

# Write all-batches control vs. Recovery hypoxia logP values to file for GO_MWU analysis
DE_Pp %>%
  filter(batch == "all", num == "Recovery", den == "Control") %>%
  unnest(logP) %>%
  select(gene, logP) %>%
  write.table(., file = "P_Recovery.vs.Ctrl.logP.txt", row.names = FALSE, quote = FALSE, sep = ",")

# Run GO_MWU for iological Process (BP) GO terms
commandArgs <- function(...) c("P_Recovery.vs.Ctrl.logP.txt", "BP")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 8  GO terms at 10% FDR
# Run GO_MWU for Molecular Function GO terms
commandArgs <- function(...) c("P_Recovery.vs.Ctrl.logP.txt", "MF")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 0  GO terms at 10% FDR
# Run GO_MWU for Cellular Component (CC) GO terms
commandArgs <- function(...) c("P_Recovery.vs.Ctrl.logP.txt", "CC")
source("GO_MWU.R")
## Continuous measure of interest: will perform MWU test
## 0  GO terms at 10% FDR
# Get list of all significant GO terms for contrast
P_HypRecovery.Ctrl.GO <- as.list(
  c(BP = "MWU_BP_P_Recovery.vs.Ctrl.logP.txt",
    MF = "MWU_MF_P_Recovery.vs.Ctrl.logP.txt",
    CC = "MWU_CC_P_Recovery.vs.Ctrl.logP.txt")
  ) %>% map(read.table, header = T) %>%
  bind_rows(.id = "ontology") %>%
  as_tibble() %>%
  filter(p.adj < 0.01) %>%
  select(ontology, nseqs, name, delta.rank, p.adj)

# Get list of individual genes with GO annotations and p-values for contrast
allgenepvals <- as.list(
  c(BP = "BP_P_Recovery.vs.Ctrl.logP.txt",
    MF = "MF_P_Recovery.vs.Ctrl.logP.txt",
    CC = "CC_P_Recovery.vs.Ctrl.logP.txt")
  ) %>% map(read_tsv) %>%
  bind_rows(.id = "ontology")

# For all significant GO terms, count how many genes with that GO term have raw pvalue < 0.05 (= "nsig")
P_HypRecovery.Ctrl.GO <- P_HypRecovery.Ctrl.GO %>%
  mutate(nsig = map_dbl(name, function(.) {
    filter(allgenepvals, name == ., -abs(value) <= log10(0.05)) %>%
    nrow(.)
  }))
  
# Print table
P_HypRecovery.Ctrl.GO %>%
  unite("genes", nsig, nseqs, sep = "/") %>%
  arrange(ontology, delta.rank) %>%
  knitr::kable()
ontology genes name delta.rank p.adj
BP 3/13 maturation of LSU-rRNA -891 0.0064047
BP 14/80 ncRNA processing -427 0.0005356
BP 25/115 cellular amide metabolic process -368 0.0004182
BP 52/245 RNA processing -294 0.0000466
BP 145/1077 lipid metabolic process 165 0.0000466